Updated 22.05.2024
working_dir <- "~/GEMMA/reviewed/MR_in"
exp_filename <- "iPSYCH_PGC_ASD_Nov2017__mr_in.tsv.gz"
out_filename <- "EA4_additive_excl_23andMe__mr_in.tsv.gz"
exp_name <- "ASD"
out_name <- "EA"
p_lim <- 0.05
# 5e-8 1e-5
r2_lim <- 0.2
library(TwoSampleMR)
## TwoSampleMR version 0.5.10
## [>] New: Option to use non-European LD reference panels for clumping etc
## [>] Some studies temporarily quarantined to verify effect allele
## [>] See news(package='TwoSampleMR') and https://gwas.mrcieu.ac.uk for further details
##
## Warning:
## You are running an old version of the TwoSampleMR package.
## This version: 0.5.10
## Latest version: 0.6.2
## Please consider updating using remotes::install_github('MRCIEU/TwoSampleMR')
library(data.table)
## Warning: package 'data.table' was built under R version 4.3.3
setwd(working_dir)
Loading the exposure data and selecting significant SNPs
setwd(working_dir)
exp_raw_full <- fread(file =exp_filename)
exp_raw <- subset(exp_raw_full,exp_raw_full$pval<p_lim)
exp_raw <- as.data.frame(exp_raw)
exp_raw$phen <- rep(exp_name, nrow(exp_raw))
exp_dat <- format_data( exp_raw,
type = "exposure",
snp_col = "SNP",
beta_col = "beta",
se_col = "se",
effect_allele_col = "alt",
other_allele_col = "ref",
pval_col = "pval",
eaf_col = "freq",
phenotype_col = "phen",
samplesize_col = "n"
)
Clumping the exposure variables
library(ieugwasr)
##
## Warning:
## You are running an old version of the ieugwasr package.
## This version: 0.1.8
## Latest version: 1.0.0
## Please consider updating using remotes::install_github('MRCIEU/ieugwasr')
## OpenGWAS updates:
## Date: 2024-05-17
## [>] OpenGWAS is growing!
## [>] Please take 2 minutes to give us feedback -
## [>] It will help directly shape our emerging roadmap
## [>] https://forms.office.com/e/eSr7EFAfCG
##
## Attaching package: 'ieugwasr'
## The following object is masked from 'package:TwoSampleMR':
##
## ld_matrix
clumped_exp <- clump_data(exp_dat,clump_r2=r2_lim,pop="EAS")
## Please look at vignettes for options on running this locally if you need to run many instances of this command.
## Clumping mnKGEY, 523665 variants, using EAS population reference
## Removing 490766 of 523665 variants due to LD with other variants or absence from LD reference panel
!!! Clumping 6hkEUL, 351 variants, using EAS population reference Removing 346 of 351 variants due to LD with other variants or absence from LD reference panel
Outcome-data
setwd(working_dir)
out_raw_full <- fread(out_filename)
out_raw <- subset(out_raw_full,out_raw_full$pval<p_lim)
out_raw <- as.data.frame(out_raw)
out_raw$phen <- rep(out_name, nrow(out_raw))
out_dat <- format_data( out_raw,
type = "outcome",
snp_col = "SNP",
beta_col = "beta",
se_col = "se",
effect_allele_col = "alt",
other_allele_col = "ref",
pval_col = "pval",
eaf_col = "freq",
phenotype_col = "phen",
samplesize_col = "n"
)
Harmonizing data
harmonized_data <- harmonise_data(clumped_exp,out_dat,action=1)
## Harmonising ASD (mnKGEY) and EA (f6Isxc)
harmonized_data
res <- mr(harmonized_data)
## Analysing 'mnKGEY' on 'f6Isxc'
res
mr_pleiotropy_res <- mr_pleiotropy_test(harmonized_data)
mr_pleiotropy_res
res_single <- mr_singlesnp(harmonized_data)
res_single
res_loo <- mr_leaveoneout(harmonized_data)
res_loo
res <- mr(harmonized_data)
## Analysing 'mnKGEY' on 'f6Isxc'
p1 <- mr_scatter_plot(res, harmonized_data)
p1[[1]]
res_loo <- mr_leaveoneout(harmonized_data)
p3 <- mr_leaveoneout_plot(res_loo)
p3[[1]]
## Warning: Using linewidth for a discrete variable is not advised.
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_errorbarh()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
res_single <- mr_singlesnp(harmonized_data)
p4 <- mr_funnel_plot(res_single)
p4[[1]]
harmonized_data$"r.outcome" <- get_r_from_lor(
harmonized_data$"beta.outcome",
harmonized_data$"eaf.outcome",
45383,
132032,
0.26,
model = "logit",
correction = FALSE
)
out <- directionality_test(harmonized_data)
## r.exposure and/or r.outcome not present.
## Calculating approximate SNP-exposure and/or SNP-outcome correlations, assuming all are quantitative traits. Please pre-calculate r.exposure and/or r.outcome using get_r_from_lor() for any binary traits
out
sc_plot <- mr_scatter_plot(res, harmonized_data)
sc_plot[[1]]
# Smallest outcome pval
lead_out_pval <- which(harmonized_data$pval.outcome == min(harmonized_data$pval.outcome) )
print("Lead outcome Pval:")
## [1] "Lead outcome Pval:"
harmonized_data$SNP[[lead_out_pval]]
## [1] "rs2388334"
harmonized_data$pval.outcome[[lead_out_pval]]
## [1] 1.927e-62
harmonized_data[lead_out_pval, ]
# Smallest exposure pval
lead_exp_pval <- which(harmonized_data$pval.exposure == min(harmonized_data$pval.exposure) )
print("Lead exposure Pval:")
## [1] "Lead exposure Pval:"
harmonized_data$SNP[[lead_exp_pval]]
## [1] "rs73128965"
harmonized_data$pval.exposure[[lead_exp_pval]]
## [1] 1.667e-08
harmonized_data[lead_exp_pval, ]
# Smallest pval for single SNP analysis
print("Lead single SNP Pval:")
## [1] "Lead single SNP Pval:"
n_max <- nrow(res_single) -2
lead_single_ind <- which(res_single$p == min(res_single$p[1:n_max]) )
res_single$SNP[[lead_single_ind]]
## [1] "rs2388334"
res_single$p[[lead_single_ind]]
## [1] 1.769462e-62
print(res_single[lead_single_ind, ])
## exposure outcome id.exposure id.outcome samplesize SNP b
## 2262 ASD EA mnKGEY f6Isxc 765000 rs2388334 0.4262871
## se p
## 2262 0.02555359 1.769462e-62